# 加载必要的包
#if (!require(caret)) install.packages("caret")
#if (!require(pROC)) install.packages("pROC")
#if (!require(MatchIt)) install.packages("MatchIt")

library(caret)
library(pROC)
library(MatchIt)
library(dplyr)
library(nnet)

# 设置工作目录并加载数据
setwd("${path}")


mydata <- read.csv("bibliometric_nuomotu.csv")
cols<-colnames(mydata)[3:38]
colnames(mydata)
cols
colsdou<- paste0("'",cols,"'")
independent_and<- paste0(cols,collapse = "+")
independent_dou<- paste0(colsdou,collapse = ",")

# 确保 ${hotspot} 是因子，并且值为 "event_0" 和 "event_1"
mydata$group_best <- factor(mydata$group_best, levels = c("0", "1"), labels = c("event_0", "event_1"))


# 使用 matchit 进行 1:3 匹配
m.out <- matchit(as.formula(paste0("group_best~",independent_and)),
                 data = mydata, 
                 method = "nearest", 
                 ratio = 3)

# 提取匹配后的数据
matched_data <- match.data(m.out)

# 训练神经网络模型
model_nn <- nnet(
  as.formula(paste0("group_best~",independent_and)),
  data = matched_data,
  size = 5,  # 隐藏层大小
  decay = 0.1,  # 正则化参数
  maxit = 1000  # 最大迭代次数
)

# 预测和评估
predictions_nn <- predict(model_nn, newdata = matched_data, type = "raw")
predicted_probs_nn <- predictions_nn
predicted_classes_nn <- factor(ifelse(predicted_probs_nn >= 0.5, "event_1", "event_0"), levels = c("event_0", "event_1"))

# 添加预测结果到数据集
mydata_with_predictions_nn <- matched_data %>%
  mutate(
    group_best_probability = predicted_probs_nn,
    group_best_predicted_class = predicted_classes_nn,
    group_best_score = NA  # NN没有评分卡
  )
# 计算 ${hotspot} 和 ${hotspot}_predicted_class 相同的数据数量
matching_rows <- mydata_with_predictions_nn %>%
  filter(group_best == group_best_predicted_class) %>%
  nrow()
# 打印结果
cat("Number of rows where ${hotspot} and ${hotspot}_predicted_class match:", matching_rows, "\n")
# 可选：计算准确率（匹配行数 / 总行数）
total_rows <- nrow(matched_data)
accuracy <- matching_rows / total_rows
# 打印结果
cat("Number of rows where ${hotspot} and ${hotspot}_predicted_class match:", accuracy, "\n")
# 这是建模集的结果


write.csv(mydata_with_predictions_nn, file = "mydata_with_predictions.csv", row.names = FALSE)

try({
  #### 画图开始
  #### 混淆矩阵 ####
  # 混淆矩阵
  conf_matrix <- confusionMatrix(data = predicted_classes_nn, reference = matched_data$group_best)
  
  sink("confusionMatrix_nn.txt")
  print(conf_matrix)
  sink()
  #### ROC曲线 ####
  # ROC曲线
  png(file="roc_nn.png", width=800, height=800)
  roc_curve <- roc(matched_data$group_best, predictions_nn[,1], plot=TRUE, print.auc=TRUE, col="darkgreen", lwd=2, main="ROC Curve")
  dev.off()
  #### 校准图 ####
  # 计算实际比例与预测概率的关系
  calib_data <- mydata_with_predictions_nn %>%
    mutate(bin = ntile(group_best_probability, 10)) %>% # 将预测概率分为10个区间
    group_by(bin) %>%
    summarise(
      mean_pred = mean(group_best_probability, na.rm = TRUE), # 每个区间的平均预测概率
      actual_rate = mean(as.numeric(group_best) - 1, na.rm = TRUE),    # 每个区间的真实发生率
      .groups = 'drop'
    )
  png(file="cal_nn.png", width=800, height=800)
  # 绘制校准图
  ggplot(calib_data, aes(x=mean_pred, y=actual_rate)) +
    geom_point(size=3) + # 绘制点
    geom_line(linetype="dashed", size=1) + # 连接点
    geom_abline(intercept=0, slope=1, linetype="solid", color="red") + # 理想校准线
    labs(title="Calibration Plot for Neural Network Model",
         x="Predicted Probability",
         y="Actual Rate") +
    theme_minimal()
  dev.off()
  
})


mydata1 <- read.csv("bibliometric_nuomotuRes.csv")
# 预测和评估
predictions_nn <- predict(model_nn, newdata = mydata1, type = "raw")
predicted_probs_nn <- predictions_nn
predicted_classes_nn <- factor(ifelse(predicted_probs_nn >= 0.5, "event_1", "event_0"), levels = c("event_0", "event_1"))

# 添加预测结果到数据集
result_with_predictions_nn <- mydata1 %>%
  mutate(
    group_best_probability = predicted_probs_nn,
    group_best_predicted_class = predicted_classes_nn,
    group_best_score = NA  # NN没有评分卡
  )
# 这是结局的结果
write.csv(result_with_predictions_nn, file = "result_with_predictions.csv", row.names = FALSE)


